library(corrplot) # Pour la visualisation des corrélations
library(DataExplorer) # Pour l'exploration des données
library(dygraphs) # Pour les graphiques dygraph
library(GGally) # Pour des visualisations avancées
library(lubridate) # Pour les dates
library(naniar) # Pour l'identification des données manquantes
library(skimr) # Pour une description globale des données
library(tidyverse) # Pour l'environnement éponyme (incluant ggplot2)
library(xts) # Pour les objets xts

1 Traitement des données

1.1 Importation des données

dataX <- read_delim("engieX.csv",
                   delim=";")
head(dataX)
## # A tibble: 6 × 78
##      ID MAC_CODE Date_time Pitch_angle Pitch_angle_min Pitch_angle_max
##   <dbl> <chr>        <dbl>       <dbl>           <dbl>           <dbl>
## 1     1 WT3              1        92.5            92.5            92.5
## 2     2 WT3              2        92.5            92.5            92.5
## 3     3 WT3              3        92.5            92.5            92.5
## 4     4 WT3              4        92.5            92.5            92.5
## 5     5 WT3              5        92.5            92.5            92.5
## 6     6 WT3              6        92.5            92.5            92.5
## # ℹ 72 more variables: Pitch_angle_std <dbl>, Hub_temperature <dbl>,
## #   Hub_temperature_min <dbl>, Hub_temperature_max <dbl>,
## #   Hub_temperature_std <dbl>, Generator_converter_speed <dbl>,
## #   Generator_converter_speed_min <dbl>, Generator_converter_speed_max <dbl>,
## #   Generator_converter_speed_std <dbl>, Generator_speed <dbl>,
## #   Generator_speed_min <dbl>, Generator_speed_max <dbl>,
## #   Generator_speed_std <dbl>, Generator_bearing_1_temperature <dbl>, …
dataY <- read_delim("engieY.csv",
                   delim=";")
head(dataY)
## # A tibble: 6 × 2
##      ID TARGET
##   <dbl>  <dbl>
## 1     1 -0.703
## 2     2 -0.747
## 3     3 -0.791
## 4     4 -0.736
## 5     5 -1.06 
## 6     6 -0.373

On effectue une jointure des données sur la variable ID qu’on retire ensuite :

data <- dataY %>%
  inner_join(dataX,by="ID") %>%
  select(-ID)

str(data)
## tibble [617,386 × 78] (S3: tbl_df/tbl/data.frame)
##  $ TARGET                             : num [1:617386] -0.703 -0.747 -0.791 -0.736 -1.055 ...
##  $ MAC_CODE                           : chr [1:617386] "WT3" "WT3" "WT3" "WT3" ...
##  $ Date_time                          : num [1:617386] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Pitch_angle                        : num [1:617386] 92.5 92.5 92.5 92.5 92.5 ...
##  $ Pitch_angle_min                    : num [1:617386] 92.5 92.5 92.5 92.5 92.5 ...
##  $ Pitch_angle_max                    : num [1:617386] 92.5 92.5 92.5 92.5 92.5 ...
##  $ Pitch_angle_std                    : num [1:617386] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Hub_temperature                    : num [1:617386] 7 7 7 6.97 6.93 ...
##  $ Hub_temperature_min                : num [1:617386] 7 7 7 6.7 6 ...
##  $ Hub_temperature_max                : num [1:617386] 7 7 7 7 7 ...
##  $ Hub_temperature_std                : num [1:617386] 0 0 0 0.02 0.17 ...
##  $ Generator_converter_speed          : num [1:617386] 3.38 3.56 3.51 4.06 3.61 ...
##  $ Generator_converter_speed_min      : num [1:617386] 3.33 3.33 3.33 3.35 3.35 ...
##  $ Generator_converter_speed_max      : num [1:617386] 3.42 3.43 11.07 6.98 13.77 ...
##  $ Generator_converter_speed_std      : num [1:617386] 0.01 0.01 1.06 0.74 2.04 ...
##  $ Generator_speed                    : num [1:617386] 0.17 0.28 0.08 0.41 0.21 ...
##  $ Generator_speed_min                : num [1:617386] 0 -0.43 -0.56 0 0 ...
##  $ Generator_speed_max                : num [1:617386] 7.72 6.23 8.1 5.11 10.39 ...
##  $ Generator_speed_std                : num [1:617386] 0.91 0.91 0.78 0.55 1.6 ...
##  $ Generator_bearing_1_temperature    : num [1:617386] 7.5 7.5 7.5 7.5 7.5 ...
##  $ Generator_bearing_1_temperature_min: num [1:617386] 7.5 7.5 7.5 7.5 7.5 ...
##  $ Generator_bearing_1_temperature_max: num [1:617386] 7.5 7.5 7.5 7.5 7.5 ...
##  $ Generator_bearing_1_temperature_std: num [1:617386] 0 0 0 0 0 ...
##  $ Generator_bearing_2_temperature    : num [1:617386] 7.17 7.2 7.26 7.26 7.25 ...
##  $ Generator_bearing_2_temperature_min: num [1:617386] 7.1 7.1 7.1 7.1 7.1 ...
##  $ Generator_bearing_2_temperature_max: num [1:617386] 7.3 7.3 7.3 7.3 7.3 ...
##  $ Generator_bearing_2_temperature_std: num [1:617386] 0.06 0.07 0.07 0.05 0.06 ...
##  $ Generator_stator_temperature       : num [1:617386] 4.16 4.17 4.17 4.15 4.14 ...
##  $ Generator_stator_temperature_min   : num [1:617386] 4.1 4.1 4.1 4.1 4.1 ...
##  $ Generator_stator_temperature_max   : num [1:617386] 4.2 4.2 4.2 4.2 4.2 ...
##  $ Generator_stator_temperature_std   : num [1:617386] 0.04 0.03 0.03 0.03 0.04 ...
##  $ Gearbox_bearing_1_temperature      : num [1:617386] 2.56 2.57 2.57 2.53 2.52 ...
##  $ Gearbox_bearing_1_temperature_min  : num [1:617386] 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 ...
##  $ Gearbox_bearing_1_temperature_max  : num [1:617386] 2.7 2.7 2.7 2.7 2.7 ...
##  $ Gearbox_bearing_1_temperature_std  : num [1:617386] 0.06 0.07 0.07 0.06 0.04 ...
##  $ Gearbox_bearing_2_temperature      : num [1:617386] 2.17 2.18 2.19 2.19 2.18 ...
##  $ Gearbox_bearing_2_temperature_min  : num [1:617386] 2 2 2 2 2 2 2 2 2 2 ...
##  $ Gearbox_bearing_2_temperature_max  : num [1:617386] 2.2 2.2 2.2 2.2 2.2 2.2 2.2 2.2 2.2 2.2 ...
##  $ Gearbox_bearing_2_temperature_std  : num [1:617386] 0.05 0.04 0.04 0.04 0.04 ...
##  $ Gearbox_inlet_temperature          : num [1:617386] 2 2.11 2.2 2.2 2.21 ...
##  $ Gearbox_inlet_temperature_min      : num [1:617386] 1.95 2 2 2.2 2.2 2.2 2.2 2.25 2.3 2.25 ...
##  $ Gearbox_inlet_temperature_max      : num [1:617386] 2 2.2 2.2 2.2 2.3 2.3 2.3 2.3 2.3 2.3 ...
##  $ Gearbox_inlet_temperature_std      : num [1:617386] 0 0.07 0.03 0 0.02 ...
##  $ Gearbox_oil_sump_temperature       : num [1:617386] 1.8 1.8 1.8 1.8 1.8 1.78 1.78 1.8 1.8 1.8 ...
##  $ Gearbox_oil_sump_temperature_min   : num [1:617386] 1.75 1.8 1.75 1.8 1.75 1.7 1.7 1.7 1.75 1.8 ...
##  $ Gearbox_oil_sump_temperature_max   : num [1:617386] 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 ...
##  $ Gearbox_oil_sump_temperature_std   : num [1:617386] 0.01 0 0 0 0 ...
##  $ Nacelle_angle                      : num [1:617386] 294 294 294 294 294 ...
##  $ Nacelle_angle_min                  : num [1:617386] 294 294 294 294 294 ...
##  $ Nacelle_angle_max                  : num [1:617386] 294 294 294 294 294 ...
##  $ Nacelle_angle_std                  : num [1:617386] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Nacelle_temperature                : num [1:617386] 5.72 5.62 5.41 5.3 5.34 ...
##  $ Nacelle_temperature_min            : num [1:617386] 5.7 5.6 5.4 5.3 5.1 ...
##  $ Nacelle_temperature_max            : num [1:617386] 5.9 5.7 5.6 5.4 5.4 ...
##  $ Nacelle_temperature_std            : num [1:617386] 0.1 0.05 0.09 0.04 0.08 ...
##  $ Absolute_wind_direction            : num [1:617386] 294 298 322 319 315 ...
##  $ Outdoor_temperature                : num [1:617386] -1.1 -1.08 -1.01 -1 -1 -1.04 -1.09 -1.1 -1.1 -1.1 ...
##  $ Outdoor_temperature_min            : num [1:617386] -1.1 -1.1 -1.1 -1 -1 -1.1 -1.1 -1.1 -1.1 -1.1 ...
##  $ Outdoor_temperature_max            : num [1:617386] -1.1 -1 -1 -1 -0.9 ...
##  $ Outdoor_temperature_std            : num [1:617386] 0 0.02 0.03 0 0 ...
##  $ Grid_frequency                     : num [1:617386] 50 50 50 50 50 ...
##  $ Grid_frequency_min                 : num [1:617386] 50 49.9 49.9 50 50 ...
##  $ Grid_frequency_max                 : num [1:617386] 50 50 50 50 50 ...
##  $ Grid_frequency_std                 : num [1:617386] 0.01 0.02 0.01 0.01 0.01 ...
##  $ Grid_voltage                       : num [1:617386] 684 686 688 690 690 ...
##  $ Grid_voltage_min                   : num [1:617386] 683 684 686 688 690 ...
##  $ Grid_voltage_max                   : num [1:617386] 685 686 689 690 691 ...
##  $ Grid_voltage_std                   : num [1:617386] 0.7 0.54 0.77 0.49 0.23 ...
##  $ Rotor_speed                        : num [1:617386] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Rotor_speed_min                    : num [1:617386] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Rotor_speed_max                    : num [1:617386] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Rotor_speed_std                    : num [1:617386] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Rotor_bearing_temperature          : num [1:617386] 2.4 2.4 2.4 2.4 2.4 ...
##  $ Rotor_bearing_temperature_min      : num [1:617386] 2.4 2.4 2.4 2.4 2.4 ...
##  $ Rotor_bearing_temperature_max      : num [1:617386] 2.4 2.4 2.4 2.4 2.5 ...
##  $ Rotor_bearing_temperature_std      : num [1:617386] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Absolute_wind_direction_c          : num [1:617386] 294 298 322 319 315 ...
##  $ Nacelle_angle_c                    : num [1:617386] 294 294 294 294 294 ...

On peut synthétiser très rapidement les données :

summary(data)
##      TARGET          MAC_CODE           Date_time       Pitch_angle      
##  Min.   : -19.48   Length:617386      Min.   :     1   Min.   :-156.180  
##  1st Qu.:  18.62   Class :character   1st Qu.: 41154   1st Qu.:  -1.000  
##  Median : 193.99   Mode  :character   Median : 80145   Median :  -1.000  
##  Mean   : 372.75                      Mean   : 79863   Mean   :  13.012  
##  3rd Qu.: 540.68                      3rd Qu.:119014   3rd Qu.:   5.567  
##  Max.   :2256.06                      Max.   :157680   Max.   : 156.820  
##                                                                          
##  Pitch_angle_min   Pitch_angle_max   Pitch_angle_std   Hub_temperature
##  Min.   :-179.57   Min.   :-146.28   Min.   : 0.0000   Min.   :-4.00  
##  1st Qu.:  -1.01   1st Qu.:  -1.00   1st Qu.: 0.0000   1st Qu.:14.00  
##  Median :  -1.00   Median :  -1.00   Median : 0.0000   Median :19.99  
##  Mean   :  11.80   Mean   :  14.55   Mean   : 0.9488   Mean   :19.15  
##  3rd Qu.:  -0.98   3rd Qu.:  15.90   3rd Qu.: 0.0000   3rd Qu.:24.00  
##  Max.   : 175.93   Max.   : 175.93   Max.   :94.7700   Max.   :37.08  
##                                                                       
##  Hub_temperature_min Hub_temperature_max Hub_temperature_std
##  Min.   :-5.00       Min.   :-3.00       Min.   :0.0000     
##  1st Qu.:14.00       1st Qu.:14.00       1st Qu.:0.0000     
##  Median :19.20       Median :20.00       Median :0.0000     
##  Mean   :18.94       Mean   :19.36       Mean   :0.1069     
##  3rd Qu.:24.00       3rd Qu.:24.00       3rd Qu.:0.2100     
##  Max.   :37.00       Max.   :38.00       Max.   :1.2400     
##                                                             
##  Generator_converter_speed Generator_converter_speed_min
##  Min.   :   0.0            Min.   :   0.0               
##  1st Qu.: 925.1            1st Qu.: 916.5               
##  Median :1175.0            Median :1025.5               
##  Mean   :1065.5            Mean   : 946.7               
##  3rd Qu.:1563.2            3rd Qu.:1364.5               
##  Max.   :1806.6            Max.   :1802.1               
##  NA's   :8064              NA's   :8064                 
##  Generator_converter_speed_max Generator_converter_speed_std Generator_speed  
##  Min.   :   0.0                Min.   :  0.00                Min.   :  -0.15  
##  1st Qu.: 935.3                1st Qu.: 11.99                1st Qu.: 926.94  
##  Median :1318.4                Median : 37.10                Median :1174.54  
##  Mean   :1178.7                Mean   : 62.86                Mean   :1064.35  
##  3rd Qu.:1758.2                3rd Qu.: 85.56                3rd Qu.:1562.67  
##  Max.   :2073.4                Max.   :892.45                Max.   :1803.84  
##  NA's   :8064                  NA's   :8064                                   
##  Generator_speed_min Generator_speed_max Generator_speed_std
##  Min.   : -49.74     Min.   :   0.0      Min.   :  0.00     
##  1st Qu.: 917.35     1st Qu.: 933.8      1st Qu.: 12.18     
##  Median :1024.64     Median :1318.1      Median : 37.18     
##  Mean   : 945.14     Mean   :1178.1      Mean   : 63.15     
##  3rd Qu.:1363.36     3rd Qu.:1758.3      3rd Qu.: 85.83     
##  Max.   :1799.19     Max.   :2076.2      Max.   :887.75     
##                                                             
##  Generator_bearing_1_temperature Generator_bearing_1_temperature_min
##  Min.   :-4.04                   Min.   :-4.10                      
##  1st Qu.:38.20                   1st Qu.:37.50                      
##  Median :42.25                   Median :41.30                      
##  Mean   :40.00                   Mean   :39.20                      
##  3rd Qu.:45.21                   3rd Qu.:44.25                      
##  Max.   :65.94                   Max.   :65.00                      
##                                                                     
##  Generator_bearing_1_temperature_max Generator_bearing_1_temperature_std
##  Min.   : -3.80                      Min.   :0.0000                     
##  1st Qu.: 38.95                      1st Qu.:0.1600                     
##  Median : 43.20                      Median :0.3000                     
##  Mean   : 40.79                      Mean   :0.3622                     
##  3rd Qu.: 46.20                      3rd Qu.:0.4700                     
##  Max.   :116.85                      Max.   :3.4400                     
##                                                                         
##  Generator_bearing_2_temperature Generator_bearing_2_temperature_min
##  Min.   :-4.46                   Min.   :-4.50                      
##  1st Qu.:35.48                   1st Qu.:35.00                      
##  Median :39.20                   Median :38.70                      
##  Mean   :37.19                   Mean   :36.74                      
##  3rd Qu.:42.02                   3rd Qu.:41.50                      
##  Max.   :67.55                   Max.   :67.20                      
##                                                                     
##  Generator_bearing_2_temperature_max Generator_bearing_2_temperature_std
##  Min.   : -4.20                      Min.   :0.0000                     
##  1st Qu.: 35.90                      1st Qu.:0.1000                     
##  Median : 39.70                      Median :0.1800                     
##  Mean   : 37.63                      Mean   :0.2083                     
##  3rd Qu.: 42.50                      3rd Qu.:0.2800                     
##  Max.   :106.95                      Max.   :3.4100                     
##                                                                         
##  Generator_stator_temperature Generator_stator_temperature_min
##  Min.   :-4.24                Min.   :-4.30                   
##  1st Qu.:57.84                1st Qu.:56.60                   
##  Median :60.59                Median :58.95                   
##  Mean   :56.65                Mean   :55.30                   
##  3rd Qu.:63.19                3rd Qu.:61.50                   
##  Max.   :95.00                Max.   :93.85                   
##                                                               
##  Generator_stator_temperature_max Generator_stator_temperature_std
##  Min.   : -4.10                   Min.   :0.0000                  
##  1st Qu.: 58.95                   1st Qu.:0.3400                  
##  Median : 62.30                   Median :0.4900                  
##  Mean   : 58.02                   Mean   :0.5028                  
##  3rd Qu.: 65.00                   3rd Qu.:0.6500                  
##  Max.   :132.40                   Max.   :3.6000                  
##                                                                   
##  Gearbox_bearing_1_temperature Gearbox_bearing_1_temperature_min
##  Min.   :-4.47                 Min.   :-4.50                    
##  1st Qu.:53.64                 1st Qu.:52.90                    
##  Median :61.93                 Median :61.00                    
##  Mean   :57.67                 Mean   :56.83                    
##  3rd Qu.:65.91                 3rd Qu.:64.85                    
##  Max.   :83.56                 Max.   :83.35                    
##                                                                 
##  Gearbox_bearing_1_temperature_max Gearbox_bearing_1_temperature_std
##  Min.   : -4.30                    Min.   : 0.0000                  
##  1st Qu.: 54.30                    1st Qu.: 0.1400                  
##  Median : 62.75                    Median : 0.2900                  
##  Mean   : 58.45                    Mean   : 0.4235                  
##  3rd Qu.: 67.05                    3rd Qu.: 0.5600                  
##  Max.   :119.40                    Max.   :10.9000                  
##                                                                     
##  Gearbox_bearing_2_temperature Gearbox_bearing_2_temperature_min
##  Min.   :-3.80                 Min.   :-3.80                    
##  1st Qu.:53.69                 1st Qu.:53.10                    
##  Median :64.06                 Median :63.20                    
##  Mean   :59.22                 Mean   :58.53                    
##  3rd Qu.:69.10                 3rd Qu.:68.10                    
##  Max.   :80.35                 Max.   :80.30                    
##                                                                 
##  Gearbox_bearing_2_temperature_max Gearbox_bearing_2_temperature_std
##  Min.   : -3.80                    Min.   :0.0000                   
##  1st Qu.: 54.30                    1st Qu.:0.1300                   
##  Median : 64.90                    Median :0.2800                   
##  Mean   : 59.87                    Mean   :0.3809                   
##  3rd Qu.: 70.00                    3rd Qu.:0.5200                   
##  Max.   :128.70                    Max.   :8.9000                   
##                                                                     
##  Gearbox_inlet_temperature Gearbox_inlet_temperature_min
##  Min.   :-6.78             Min.   :-6.80                
##  1st Qu.:45.43             1st Qu.:44.20                
##  Median :51.04             Median :49.70                
##  Mean   :48.43             Mean   :47.33                
##  3rd Qu.:55.60             3rd Qu.:54.75                
##  Max.   :61.66             Max.   :59.95                
##  NA's   :8064              NA's   :8064                 
##  Gearbox_inlet_temperature_max Gearbox_inlet_temperature_std
##  Min.   : -6.60                Min.   : 0.000               
##  1st Qu.: 46.35                1st Qu.: 0.150               
##  Median : 52.30                Median : 0.260               
##  Mean   : 49.48                Mean   : 0.535               
##  3rd Qu.: 56.55                3rd Qu.: 0.510               
##  Max.   :108.90                Max.   :10.650               
##  NA's   :8064                  NA's   :8064                 
##  Gearbox_oil_sump_temperature Gearbox_oil_sump_temperature_min
##  Min.   :-4.30                Min.   :-4.30                   
##  1st Qu.:50.15                1st Qu.:49.80                   
##  Median :55.72                Median :55.10                   
##  Mean   :51.94                Mean   :51.44                   
##  3rd Qu.:57.68                3rd Qu.:57.15                   
##  Max.   :68.58                Max.   :68.40                   
##                                                               
##  Gearbox_oil_sump_temperature_max Gearbox_oil_sump_temperature_std
##  Min.   : -4.10                   Min.   :0.0000                  
##  1st Qu.: 50.50                   1st Qu.:0.1000                  
##  Median : 56.30                   Median :0.1500                  
##  Mean   : 52.41                   Mean   :0.2125                  
##  3rd Qu.: 58.20                   3rd Qu.:0.2700                  
##  Max.   :135.40                   Max.   :3.7300                  
##                                                                   
##  Nacelle_angle    Nacelle_angle_min Nacelle_angle_max Nacelle_angle_std
##  Min.   :  0.00   Min.   :  0.00    Min.   :  0.0     Min.   :  0.000  
##  1st Qu.: 79.66   1st Qu.: 88.88    1st Qu.: 87.9     1st Qu.:  0.000  
##  Median :193.27   Median :198.19    Median :199.7     Median :  0.000  
##  Mean   :175.55   Mean   :184.93    Mean   :186.6     Mean   :  3.241  
##  3rd Qu.:248.03   3rd Qu.:254.00    3rd Qu.:256.9     3rd Qu.:  0.000  
##  Max.   :360.00   Max.   :539.98    Max.   :540.0     Max.   :178.230  
##                                                                        
##  Nacelle_temperature Nacelle_temperature_min Nacelle_temperature_max
##  Min.   :-5.90       Min.   :-6.15           Min.   :-4.80          
##  1st Qu.:21.44       1st Qu.:20.50           1st Qu.:22.50          
##  Median :26.03       Median :25.20           Median :26.90          
##  Mean   :25.13       Mean   :24.36           Mean   :25.88          
##  3rd Qu.:29.91       3rd Qu.:29.30           3rd Qu.:30.65          
##  Max.   :46.26       Max.   :46.10           Max.   :96.70          
##                                                                     
##  Nacelle_temperature_std Absolute_wind_direction Outdoor_temperature
##  Min.   :0.0000          Min.   :  0.00          Min.   :-12.85     
##  1st Qu.:0.1000          1st Qu.: 80.68          1st Qu.:  5.28     
##  Median :0.2300          Median :191.75          Median : 11.78     
##  Mean   :0.4485          Mean   :174.83          Mean   : 11.40     
##  3rd Qu.:0.6200          3rd Qu.:245.81          3rd Qu.: 17.29     
##  Max.   :4.7500          Max.   :360.00          Max.   : 35.63     
##                                                                     
##  Outdoor_temperature_min Outdoor_temperature_max Outdoor_temperature_std
##  Min.   :-13.00          Min.   :-12.70          Min.   :0.0000         
##  1st Qu.:  5.10          1st Qu.:  5.40          1st Qu.:0.0400         
##  Median : 11.60          Median : 11.90          Median :0.0800         
##  Mean   : 11.20          Mean   : 11.59          Mean   :0.1024         
##  3rd Qu.: 17.05          3rd Qu.: 17.50          3rd Qu.:0.1200         
##  Max.   : 35.00          Max.   : 85.50          Max.   :4.1700         
##                                                                         
##  Grid_frequency  Grid_frequency_min Grid_frequency_max Grid_frequency_std
##  Min.   : 0.00   Min.   : 0.00      Min.   : 0.00      Min.   : 0.00000  
##  1st Qu.:49.98   1st Qu.:49.96      1st Qu.:50.01      1st Qu.: 0.01000  
##  Median :49.99   Median :49.97      Median :50.02      Median : 0.01000  
##  Mean   :49.96   Mean   :49.92      Mean   :49.98      Mean   : 0.01649  
##  3rd Qu.:50.00   3rd Qu.:49.98      3rd Qu.:50.03      3rd Qu.: 0.01000  
##  Max.   :50.14   Max.   :50.11      Max.   :56.03      Max.   :25.02000  
##                                                                          
##   Grid_voltage    Grid_voltage_min Grid_voltage_max Grid_voltage_std
##  Min.   :  0.0    Min.   :  0.0    Min.   :  0.0    Min.   :  0.00  
##  1st Qu.:686.7    1st Qu.:683.1    1st Qu.:689.6    1st Qu.:  0.79  
##  Median :692.5    Median :689.4    Median :695.3    Median :  1.27  
##  Mean   :684.8    Mean   :680.5    Mean   :688.5    Mean   :  1.78  
##  3rd Qu.:698.8    3rd Qu.:695.2    3rd Qu.:702.9    3rd Qu.:  2.21  
##  Max.   :756.6    Max.   :753.7    Max.   :760.7    Max.   :370.99  
##  NA's   :101322   NA's   :101322   NA's   :101322   NA's   :101322  
##   Rotor_speed    Rotor_speed_min  Rotor_speed_max Rotor_speed_std 
##  Min.   : 0.00   Min.   : 0.000   Min.   : 0.00   Min.   :0.0000  
##  1st Qu.: 8.79   1st Qu.: 8.700   1st Qu.: 8.87   1st Qu.:0.1200  
##  Median :11.16   Median : 9.720   Median :12.54   Median :0.3600  
##  Mean   :10.12   Mean   : 8.976   Mean   :11.21   Mean   :0.6023  
##  3rd Qu.:14.87   3rd Qu.:12.950   3rd Qu.:16.76   3rd Qu.:0.8200  
##  Max.   :17.18   Max.   :17.110   Max.   :19.71   Max.   :8.4700  
##                                                                   
##  Rotor_bearing_temperature Rotor_bearing_temperature_min
##  Min.   :-5.20             Min.   :-5.20                
##  1st Qu.:25.60             1st Qu.:25.50                
##  Median :29.60             Median :29.50                
##  Mean   :28.42             Mean   :28.32                
##  3rd Qu.:33.01             3rd Qu.:32.90                
##  Max.   :67.67             Max.   :53.50                
##                                                         
##  Rotor_bearing_temperature_max Rotor_bearing_temperature_std
##  Min.   : -5.1                 Min.   : 0.00000             
##  1st Qu.: 25.7                 1st Qu.: 0.02000             
##  Median : 29.7                 Median : 0.04000             
##  Mean   : 28.5                 Mean   : 0.04733             
##  3rd Qu.: 33.1                 3rd Qu.: 0.07000             
##  Max.   :393.8                 Max.   :98.57000             
##                                                             
##  Absolute_wind_direction_c Nacelle_angle_c 
##  Min.   :  0.00            Min.   :  0.00  
##  1st Qu.: 80.67            1st Qu.: 79.66  
##  Median :191.74            Median :193.27  
##  Mean   :174.83            Mean   :175.54  
##  3rd Qu.:245.81            3rd Qu.:248.03  
##  Max.   :360.00            Max.   :360.00  
##  NA's   :72                NA's   :72
skim(data)
Data summary
Name data
Number of rows 617386
Number of columns 78
_______________________
Column type frequency:
character 1
numeric 77
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
MAC_CODE 0 1 3 3 0 4 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
TARGET 0 1.00 372.75 468.00 -19.48 18.62 193.99 540.68 2256.06 ▇▂▁▁▁
Date_time 0 1.00 79862.86 45185.11 1.00 41154.00 80145.00 119014.00 157680.00 ▇▇▇▇▇
Pitch_angle 0 1.00 13.01 27.24 -156.18 -1.00 -1.00 5.57 156.82 ▁▁▇▂▁
Pitch_angle_min 0 1.00 11.80 27.05 -179.57 -1.01 -1.00 -0.98 175.93 ▁▁▇▂▁
Pitch_angle_max 0 1.00 14.55 28.01 -146.28 -1.00 -1.00 15.90 175.93 ▁▁▇▁▁
Pitch_angle_std 0 1.00 0.95 4.21 0.00 0.00 0.00 0.00 94.77 ▇▁▁▁▁
Hub_temperature 0 1.00 19.15 6.60 -4.00 14.00 19.99 24.00 37.08 ▁▃▇▇▂
Hub_temperature_min 0 1.00 18.94 6.63 -5.00 14.00 19.20 24.00 37.00 ▁▃▇▇▂
Hub_temperature_max 0 1.00 19.36 6.60 -3.00 14.00 20.00 24.00 38.00 ▁▅▇▇▁
Hub_temperature_std 0 1.00 0.11 0.16 0.00 0.00 0.00 0.21 1.24 ▇▂▁▁▁
Generator_converter_speed 8064 0.99 1065.45 601.02 0.00 925.15 1175.00 1563.16 1806.56 ▆▁▅▆▇
Generator_converter_speed_min 8064 0.99 946.74 570.89 0.00 916.50 1025.54 1364.47 1802.10 ▆▁▇▇▆
Generator_converter_speed_max 8064 0.99 1178.68 628.46 0.00 935.31 1318.39 1758.18 2073.45 ▆▁▆▆▇
Generator_converter_speed_std 8064 0.99 62.86 80.16 0.00 11.99 37.10 85.56 892.45 ▇▁▁▁▁
Generator_speed 0 1.00 1064.35 601.88 -0.15 926.94 1174.54 1562.67 1803.84 ▆▁▅▆▇
Generator_speed_min 0 1.00 945.14 571.96 -49.74 917.35 1024.64 1363.36 1799.19 ▇▁▇▇▆
Generator_speed_max 0 1.00 1178.11 628.94 0.00 933.77 1318.12 1758.34 2076.22 ▆▁▆▆▇
Generator_speed_std 0 1.00 63.15 80.26 0.00 12.18 37.18 85.83 887.75 ▇▁▁▁▁
Generator_bearing_1_temperature 0 1.00 40.00 8.77 -4.04 38.20 42.25 45.21 65.94 ▁▁▂▇▁
Generator_bearing_1_temperature_min 0 1.00 39.20 8.58 -4.10 37.50 41.30 44.25 65.00 ▁▁▂▇▁
Generator_bearing_1_temperature_max 0 1.00 40.79 9.01 -3.80 38.95 43.20 46.20 116.85 ▁▇▆▁▁
Generator_bearing_1_temperature_std 0 1.00 0.36 0.29 0.00 0.16 0.30 0.47 3.44 ▇▁▁▁▁
Generator_bearing_2_temperature 0 1.00 37.19 8.48 -4.46 35.48 39.20 42.02 67.55 ▁▁▆▇▁
Generator_bearing_2_temperature_min 0 1.00 36.74 8.39 -4.50 35.00 38.70 41.50 67.20 ▁▁▆▇▁
Generator_bearing_2_temperature_max 0 1.00 37.63 8.60 -4.20 35.90 39.70 42.50 106.95 ▁▇▇▁▁
Generator_bearing_2_temperature_std 0 1.00 0.21 0.15 0.00 0.10 0.18 0.28 3.41 ▇▁▁▁▁
Generator_stator_temperature 0 1.00 56.65 12.96 -4.24 57.84 60.59 63.19 95.00 ▁▁▁▇▁
Generator_stator_temperature_min 0 1.00 55.30 12.58 -4.30 56.60 58.95 61.50 93.85 ▁▁▁▇▁
Generator_stator_temperature_max 0 1.00 58.02 13.43 -4.10 58.95 62.30 65.00 132.40 ▁▁▇▁▁
Generator_stator_temperature_std 0 1.00 0.50 0.28 0.00 0.34 0.49 0.65 3.60 ▇▂▁▁▁
Gearbox_bearing_1_temperature 0 1.00 57.67 13.66 -4.47 53.64 61.93 65.91 83.56 ▁▁▂▇▃
Gearbox_bearing_1_temperature_min 0 1.00 56.83 13.45 -4.50 52.90 61.00 64.85 83.35 ▁▁▂▇▂
Gearbox_bearing_1_temperature_max 0 1.00 58.45 13.93 -4.30 54.30 62.75 67.05 119.40 ▁▁▇▁▁
Gearbox_bearing_1_temperature_std 0 1.00 0.42 0.43 0.00 0.14 0.29 0.56 10.90 ▇▁▁▁▁
Gearbox_bearing_2_temperature 0 1.00 59.22 14.11 -3.80 53.69 64.06 69.10 80.35 ▁▁▂▅▇
Gearbox_bearing_2_temperature_min 0 1.00 58.53 13.94 -3.80 53.10 63.20 68.10 80.30 ▁▁▂▆▇
Gearbox_bearing_2_temperature_max 0 1.00 59.87 14.32 -3.80 54.30 64.90 70.00 128.70 ▁▂▇▁▁
Gearbox_bearing_2_temperature_std 0 1.00 0.38 0.37 0.00 0.13 0.28 0.52 8.90 ▇▁▁▁▁
Gearbox_inlet_temperature 8064 0.99 48.43 10.15 -6.78 45.43 51.04 55.60 61.66 ▁▁▁▃▇
Gearbox_inlet_temperature_min 8064 0.99 47.33 10.20 -6.80 44.20 49.70 54.75 59.95 ▁▁▁▃▇
Gearbox_inlet_temperature_max 8064 0.99 49.47 10.24 -6.60 46.35 52.30 56.55 108.90 ▁▁▇▁▁
Gearbox_inlet_temperature_std 8064 0.99 0.54 0.79 0.00 0.15 0.26 0.51 10.65 ▇▁▁▁▁
Gearbox_oil_sump_temperature 0 1.00 51.94 9.89 -4.30 50.15 55.72 57.68 68.58 ▁▁▁▃▇
Gearbox_oil_sump_temperature_min 0 1.00 51.44 9.87 -4.30 49.80 55.10 57.15 68.40 ▁▁▁▅▇
Gearbox_oil_sump_temperature_max 0 1.00 52.41 9.96 -4.10 50.50 56.30 58.20 135.40 ▁▃▇▁▁
Gearbox_oil_sump_temperature_std 0 1.00 0.21 0.18 0.00 0.10 0.15 0.27 3.73 ▇▁▁▁▁
Nacelle_angle 0 1.00 175.55 96.77 0.00 79.66 193.27 248.03 360.00 ▇▃▇▇▃
Nacelle_angle_min 0 1.00 184.93 105.11 0.00 88.88 198.19 254.00 539.98 ▆▇▇▂▁
Nacelle_angle_max 0 1.00 186.56 106.35 0.00 87.90 199.73 256.92 540.00 ▆▇▇▂▁
Nacelle_angle_std 0 1.00 3.24 16.36 0.00 0.00 0.00 0.00 178.23 ▇▁▁▁▁
Nacelle_temperature 0 1.00 25.13 6.95 -5.90 21.44 26.03 29.91 46.26 ▁▁▆▇▁
Nacelle_temperature_min 0 1.00 24.36 7.01 -6.15 20.50 25.20 29.30 46.10 ▁▂▇▇▁
Nacelle_temperature_max 0 1.00 25.88 6.94 -4.80 22.50 26.90 30.65 96.70 ▁▇▁▁▁
Nacelle_temperature_std 0 1.00 0.45 0.52 0.00 0.10 0.23 0.62 4.75 ▇▁▁▁▁
Absolute_wind_direction 0 1.00 174.83 96.09 0.00 80.68 191.75 245.81 360.00 ▇▃▇▇▃
Outdoor_temperature 0 1.00 11.40 8.05 -12.85 5.28 11.78 17.29 35.63 ▁▅▇▅▁
Outdoor_temperature_min 0 1.00 11.20 7.99 -13.00 5.10 11.60 17.05 35.00 ▁▅▇▆▁
Outdoor_temperature_max 0 1.00 11.59 8.12 -12.70 5.40 11.90 17.50 85.50 ▃▇▁▁▁
Outdoor_temperature_std 0 1.00 0.10 0.10 0.00 0.04 0.08 0.12 4.17 ▇▁▁▁▁
Grid_frequency 0 1.00 49.96 1.24 0.00 49.98 49.99 50.00 50.14 ▁▁▁▁▇
Grid_frequency_min 0 1.00 49.92 1.56 0.00 49.96 49.97 49.98 50.11 ▁▁▁▁▇
Grid_frequency_max 0 1.00 49.98 1.36 0.00 50.01 50.02 50.03 56.03 ▁▁▁▁▇
Grid_frequency_std 0 1.00 0.02 0.31 0.00 0.01 0.01 0.01 25.02 ▇▁▁▁▁
Grid_voltage 101322 0.84 684.80 57.07 0.00 686.71 692.46 698.82 756.57 ▁▁▁▁▇
Grid_voltage_min 101322 0.84 680.55 58.16 0.00 683.14 689.43 695.24 753.73 ▁▁▁▁▇
Grid_voltage_max 101322 0.84 688.53 58.19 0.00 689.56 695.29 702.88 760.68 ▁▁▁▁▇
Grid_voltage_std 101322 0.84 1.78 4.55 0.00 0.79 1.27 2.21 370.99 ▇▁▁▁▁
Rotor_speed 0 1.00 10.12 5.73 0.00 8.79 11.16 14.87 17.18 ▆▁▅▆▇
Rotor_speed_min 0 1.00 8.98 5.44 0.00 8.70 9.72 12.95 17.11 ▆▁▇▆▆
Rotor_speed_max 0 1.00 11.21 6.00 0.00 8.87 12.54 16.76 19.71 ▆▁▆▆▇
Rotor_speed_std 0 1.00 0.60 0.76 0.00 0.12 0.36 0.82 8.47 ▇▁▁▁▁
Rotor_bearing_temperature 0 1.00 28.42 7.26 -5.20 25.60 29.60 33.01 67.67 ▁▂▇▁▁
Rotor_bearing_temperature_min 0 1.00 28.32 7.27 -5.20 25.50 29.50 32.90 53.50 ▁▁▇▇▁
Rotor_bearing_temperature_max 0 1.00 28.50 7.29 -5.10 25.70 29.70 33.10 393.80 ▇▁▁▁▁
Rotor_bearing_temperature_std 0 1.00 0.05 0.13 0.00 0.02 0.04 0.07 98.57 ▇▁▁▁▁
Absolute_wind_direction_c 72 1.00 174.83 96.09 0.00 80.67 191.74 245.81 360.00 ▇▃▇▇▃
Nacelle_angle_c 72 1.00 175.54 96.77 0.00 79.66 193.27 248.03 360.00 ▇▃▇▇▃

On constate qu’il existe des données manquantes.

1.2 Analyse des données manquantes

On comptabilise les données manquantes :

apply(is.na(data),2,sum)
##                              TARGET                            MAC_CODE 
##                                   0                                   0 
##                           Date_time                         Pitch_angle 
##                                   0                                   0 
##                     Pitch_angle_min                     Pitch_angle_max 
##                                   0                                   0 
##                     Pitch_angle_std                     Hub_temperature 
##                                   0                                   0 
##                 Hub_temperature_min                 Hub_temperature_max 
##                                   0                                   0 
##                 Hub_temperature_std           Generator_converter_speed 
##                                   0                                8064 
##       Generator_converter_speed_min       Generator_converter_speed_max 
##                                8064                                8064 
##       Generator_converter_speed_std                     Generator_speed 
##                                8064                                   0 
##                 Generator_speed_min                 Generator_speed_max 
##                                   0                                   0 
##                 Generator_speed_std     Generator_bearing_1_temperature 
##                                   0                                   0 
## Generator_bearing_1_temperature_min Generator_bearing_1_temperature_max 
##                                   0                                   0 
## Generator_bearing_1_temperature_std     Generator_bearing_2_temperature 
##                                   0                                   0 
## Generator_bearing_2_temperature_min Generator_bearing_2_temperature_max 
##                                   0                                   0 
## Generator_bearing_2_temperature_std        Generator_stator_temperature 
##                                   0                                   0 
##    Generator_stator_temperature_min    Generator_stator_temperature_max 
##                                   0                                   0 
##    Generator_stator_temperature_std       Gearbox_bearing_1_temperature 
##                                   0                                   0 
##   Gearbox_bearing_1_temperature_min   Gearbox_bearing_1_temperature_max 
##                                   0                                   0 
##   Gearbox_bearing_1_temperature_std       Gearbox_bearing_2_temperature 
##                                   0                                   0 
##   Gearbox_bearing_2_temperature_min   Gearbox_bearing_2_temperature_max 
##                                   0                                   0 
##   Gearbox_bearing_2_temperature_std           Gearbox_inlet_temperature 
##                                   0                                8064 
##       Gearbox_inlet_temperature_min       Gearbox_inlet_temperature_max 
##                                8064                                8064 
##       Gearbox_inlet_temperature_std        Gearbox_oil_sump_temperature 
##                                8064                                   0 
##    Gearbox_oil_sump_temperature_min    Gearbox_oil_sump_temperature_max 
##                                   0                                   0 
##    Gearbox_oil_sump_temperature_std                       Nacelle_angle 
##                                   0                                   0 
##                   Nacelle_angle_min                   Nacelle_angle_max 
##                                   0                                   0 
##                   Nacelle_angle_std                 Nacelle_temperature 
##                                   0                                   0 
##             Nacelle_temperature_min             Nacelle_temperature_max 
##                                   0                                   0 
##             Nacelle_temperature_std             Absolute_wind_direction 
##                                   0                                   0 
##                 Outdoor_temperature             Outdoor_temperature_min 
##                                   0                                   0 
##             Outdoor_temperature_max             Outdoor_temperature_std 
##                                   0                                   0 
##                      Grid_frequency                  Grid_frequency_min 
##                                   0                                   0 
##                  Grid_frequency_max                  Grid_frequency_std 
##                                   0                                   0 
##                        Grid_voltage                    Grid_voltage_min 
##                              101322                              101322 
##                    Grid_voltage_max                    Grid_voltage_std 
##                              101322                              101322 
##                         Rotor_speed                     Rotor_speed_min 
##                                   0                                   0 
##                     Rotor_speed_max                     Rotor_speed_std 
##                                   0                                   0 
##           Rotor_bearing_temperature       Rotor_bearing_temperature_min 
##                                   0                                   0 
##       Rotor_bearing_temperature_max       Rotor_bearing_temperature_std 
##                                   0                                   0 
##           Absolute_wind_direction_c                     Nacelle_angle_c 
##                                  72                                  72

Les variables présentant plus de 1% de valeurs manquantes sont :

don_manq <- apply(is.na(data),2,mean)
don_manq[don_manq>0.01]
##     Generator_converter_speed Generator_converter_speed_min 
##                    0.01306152                    0.01306152 
## Generator_converter_speed_max Generator_converter_speed_std 
##                    0.01306152                    0.01306152 
##     Gearbox_inlet_temperature Gearbox_inlet_temperature_min 
##                    0.01306152                    0.01306152 
## Gearbox_inlet_temperature_max Gearbox_inlet_temperature_std 
##                    0.01306152                    0.01306152 
##                  Grid_voltage              Grid_voltage_min 
##                    0.16411451                    0.16411451 
##              Grid_voltage_max              Grid_voltage_std 
##                    0.16411451                    0.16411451

Avec naniar :

gg_miss_var(data)

gg_miss_fct(x=data,fct=MAC_CODE)

On constate que les variables Grid_voltage, Grid_voltage_min, Grid_voltage_max et Grid_voltage_std présentent beaucoup de données manquantes (environ 16%) ; on analyse ces variables pour décider de leur retrait ou de leur imputation.

ggplot()+
  geom_histogram(data=data,aes(x=Grid_voltage,y=..density..),fill="steelblue")+
  labs(y="Fréquence",title="Histogramme")

ggplot()+
  geom_point(data=data,aes(x=Grid_voltage,y=TARGET))

ggplot()+
  stat_binhex(data=data,aes(x=Grid_voltage,y=TARGET))

Cette analyse nous incite à retirer ces variables dans une première approche :

data <- data %>%
  select(-Grid_voltage,-Grid_voltage_min,-Grid_voltage_max,-Grid_voltage_std)

On peut également remarquer que les couples de variables Absolute_wind_direction-Absolute_wind_direction_c et Nacelle_angle-Nacelle_angle_c sont quasiment identiques (seules des valeurs manquantes les séparent), on en supprime une sur deux (celle ne présentant pas de données manquantes par hypothèse, il faudrait vérifier la pertinence de cette dernière auprès du “métier”) :

data <- data %>%
  select(-Absolute_wind_direction_c,-Nacelle_angle_c)

On retire les observations pour lesquelles il subsiste des données manquantes :

data <- na.omit(data)

2 Restriction du champs d’étude

On travaille ici uniquement sur la turbine n°3 :

dataWT3 <- data %>%
  filter(MAC_CODE=="WT3")
ggplot()+
  geom_point(data=dataWT3,aes(x=Rotor_speed,y=TARGET))

ggplot()+
  stat_binhex(data=dataWT3,aes(x=Rotor_speed,y=TARGET))

On retire les puissances inférieures à 100.

dataWT3 <- dataWT3 %>%
  filter(TARGET>=100)

On travaille uniquement sur une donnée par heure :

dataWT3h <- dataWT3 %>%
  filter(Date_time%%6==0)
ggplot()+
  geom_line(data=dataWT3h,aes(x=Date_time,y=TARGET))

Pour représenter la série à l’aide de dygraph, on créé une pseudo-date à partir du 1er janvier 2000 :

dataWT3h <- dataWT3h %>%
  mutate(t=as.POSIXlt("2000-01-01 00:00:00",format="%Y-%m-%d %H:%M:%S")+(Date_time-1)*10*60)

dataWT3h_xts <- xts(dataWT3h$TARGET,order.by=dataWT3h$t)

dygraph(dataWT3h_xts) %>%
  dyRangeSelector()

On ne conserve que les régresseurs :

dataWT3h <- dataWT3h %>%
  select(-MAC_CODE,-Date_time,-t)

3 Analyse descriptive (succincte)

#ggpairs(dataWT3h,columns=c("TARGET","Pitch_angle","Hub_temperature","Generator_converter_speed","Generator_speed","Generator_bearing_1_temperature","Generator_bearing_2_temperature","Generator_stator_temperature","Gearbox_bearing_1_temperature","Gearbox_bearing_2_temperature","Gearbox_inlet_temperature","Gearbox_oil_sump_temperature","Nacelle_angle","Nacelle_temperature","Absolute_wind_direction","Outdoor_temperature","Grid_frequency","Rotor_speed","Rotor_bearing_temperature"))

corrplot(cor(dataWT3h[,c("TARGET","Pitch_angle","Hub_temperature","Generator_converter_speed","Generator_speed","Generator_bearing_1_temperature","Generator_bearing_2_temperature","Generator_stator_temperature","Gearbox_bearing_1_temperature","Gearbox_bearing_2_temperature","Gearbox_inlet_temperature","Gearbox_oil_sump_temperature","Nacelle_angle","Nacelle_temperature","Absolute_wind_direction","Outdoor_temperature","Grid_frequency","Rotor_speed","Rotor_bearing_temperature")]),method="circle",type="upper")

ggplot()+
  geom_histogram(data=dataWT3h,aes(x=TARGET,y=..density..),alpha=0.5,position="identity")+
  labs(x="TARGET",y="Fréquence",title="Histogramme",fill="Turbine")

ggplot()+
  geom_point(data=dataWT3h,aes(x=Rotor_speed,y=TARGET))

ggplot()+
  stat_binhex(data=dataWT3h,aes(x=Rotor_speed,y=TARGET))

On peut également utiliser le package DataExplorer :

plot_scatterplot(dataWT3h,by='TARGET')

#create_report(dataWT3h,y="TARGET")

4 Modélisation

4.1 Création d’une base d’apprentissage et d’une base de test

n <- nrow(dataWT3h)

part_test <- 1/3
n_train <- floor(n*(1-part_test))

set.seed(123)
obs_train <- sample(1:n,n_train)

dataWT3h_train <- dataWT3h[obs_train,]
dataWT3h_test <- dataWT3h[-obs_train,]